{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "df = pd.read_csv(\n",
    "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
    "    header=None)\n",
    "df.columns = [\n",
    "    \"Age\", \"WorkClass\", \"fnlwgt\", \"Education\", \"EducationNum\",\n",
    "    \"MaritalStatus\", \"Occupation\", \"Relationship\", \"Race\", \"Gender\",\n",
    "    \"CapitalGain\", \"CapitalLoss\", \"HoursPerWeek\", \"NativeCountry\", \"Income\"\n",
    "]\n",
    "\n",
    "train_cols = df.columns[0:-1]\n",
    "label = df.columns[-1]\n",
    "X = df[train_cols]\n",
    "y = df[label].apply(lambda x: 0 if x == \" <=50K\" else 1) #Turning response into 0 and 1\n",
    "\n",
    "seed = 42\n",
    "np.random.seed(seed)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fit and compare DP-EBM vs. standard EBM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.privacy import DPExplainableBoostingClassifier\n",
    "from interpret.glassbox import ExplainableBoostingClassifier\n",
    "import time\n",
    "from sklearn.metrics import roc_auc_score, accuracy_score\n",
    "\n",
    "start = time.time()\n",
    "dpebm = DPExplainableBoostingClassifier(epsilon=1, delta=1e-6)\n",
    "dpebm.fit(X_train, y_train)\n",
    "\n",
    "dp_auroc = roc_auc_score(y_test, dpebm.predict_proba(X_test)[:, 1])\n",
    "end = time.time()\n",
    "\n",
    "print(f\"DP EBM with eps: {dpebm.epsilon} and delta: {dpebm.delta} trained in {end - start:.2f} seconds with a test AUC of {dp_auroc:.3f}\")\n",
    "\n",
    "\n",
    "start = time.time()\n",
    "ebm = ExplainableBoostingClassifier(random_state=seed)\n",
    "ebm.fit(X_train, y_train)\n",
    "\n",
    "ebm_auroc = roc_auc_score(y_test, ebm.predict_proba(X_test)[:, 1])\n",
    "end = time.time()\n",
    "print(f\"EBM trained in {end - start:.2f} seconds with a test AUC of {ebm_auroc:.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## See differences in learned shape functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret import show\n",
    "\n",
    "show(ebm.explain_global(name='Standard EBM'))\n",
    "show(dpebm.explain_global(name='DP EBM'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
